######################
#  Replication code for 'Mediating the Electoral Connection', forthcoming in the JOP
#  John Henderson and John Brooks
#  12/7/2015    
######################    

# table3_iv_fstats.R
#  :: builds data to run iv analysis in STATA for the cluster F-statistics, and Sargan overidentification test p-values
                                                          
rm(list=ls())
setwd('~/Dropbox/rainReplication')

if(length(which(installed.packages()[,1]=='stringr'))!=1){
	install.packages('stringr')                        
}
library(stringr)
 
for(fes.type in 2:3){
non.missings=4

source('prelimMain.R') 
    
# residualize since running iv in STATA is a time-suck...
dose_iv=vote_iv=array(NA,nrow(rain_data))
vts=lm(vote~as.factor(fe_id_num),subset=full,data=covs)
dse=lm(dose~as.factor(fe_id_num),subset=full,data=covs)

dose_iv[as.numeric(names(dse$res))]=dse$res
vote_iv[as.numeric(names(vts$res))]=vts$res

covs$dose_iv=dose_iv
covs$vote_iv=vote_iv

alts=T
if(alts==T){
main_iv1_fe=ivreg(vote~as.factor(fe_id_num)+d_inc+
	dist_prev + midterm + pres_party + 
	black + construction + educ + 
	minc + farmer + forborn + gvtwkr + manuf + pop + unempld + 
	urban + retail + sos + gov + comp_cq + redistricted + 
	dose + dose_prv + vote_prv,
	~as.factor(fe_id_num)+d_inc+
	dist_prev + midterm + pres_party + 
	black + construction + educ + 
	minc + farmer + forborn + gvtwkr + manuf + pop + unempld + 
	urban + retail + sos + gov + comp_cq + redistricted + 
	dose_prv + vote_prv + rain_day+rain_day_prev,
	subset=full,data=covs) 
       
nms=as.numeric(str_sub(names(main_iv1_fe$coef)[which(str_sub(names(main_iv1_fe$coef),1,5)=="as.fa")],21))

incs=array(0,nrow(rain_data))
for(i in 1:length(nms)){
	ix=which(nms[i]==fe_id_num)
	if(length(ix)>0){
		incs[ix]=1
	}
}      
}

covs$fe_id_num=fe_id_num 
covs=covs[,-c(which(names(covs)=='as.factor(fe_id_num)'))]
     

if(alts==T){                
un_fe_id_num=unique(fe_id_num)
fe_mat=matrix(0,nrow(covs),length(un_fe_id_num))
for(j in 1:length(un_fe_id_num)){
	ix=which(fe_id_num==un_fe_id_num[j])
	fe_mat[ix,j]=1
}
fe_mat=as.data.frame(fe_mat)
names(fe_mat)=paste('FE',names(fe_mat),sep='')
covs=cbind(covs,fe_mat)
}   

ix=grep(names(covs),pattern="as.factor")
if(length(ix)>0){         
	covs=covs[,-c(ix)] 
}   
  
# adjust full
if(alts==T){
	full=full[which(incs[full]==1)]
}

if(alts==T){
	covs=covs[,-c(which(colSums(covs,na.rm=T)==0))]
	covs=as.data.frame(covs)    
}

covs=covs[full,]

library(foreign)    
write.dta(covs,file=paste('fstats/covs_',non.missings,'_',fes.type,'.dta',sep=''))

} 
# END      